# 时间：2021年2月28日
# 作者：张志刚

"""数据库数据自动生成

用于gen_employee()用于填充员工表。
工资数据从2015年起开始记录，结束时间为2020年。
"""

import pymysql
from faker import Faker
from datetime import datetime
from pypinyin import lazy_pinyin
from random import choice, randint

class DBClient:
    def __init__(self, host, user, passwd, db):
        self.conn = pymysql.connect(
            host=host,
            user=user,
            password=passwd,
            db=db
        )
        self.cursor = self.conn.cursor()

    def exec_sql(self, sql, data):
        self.cursor.execute(sql, data)

    def commit(self):
        self.conn.commit()

    def close(self):
        self.cursor.close()
        self.conn.close()

def gen_employee(name, dept_id):
    fake = Faker(locale='zh_CN')
    email_suffix = ['@tedu.cn', '@tarena.com']
    d1 = datetime(2000, 12, 31)  # 最年轻的员工生日
    birth_date = fake.date_between(start_date="-50y", end_date=d1)  # 50年前到d1出生
    birth_date = birth_date.strftime('%Y-%m-%d')
    hire_date = fake.date_between(start_date="-20y", end_date="today")  # 20年前公司成立
    hire_date = hire_date.strftime('%Y-%m-%d')
    phone_number = fake.phone_number()  # 电话号码
    email = '%s%s' % (''.join(lazy_pinyin(name)), choice(email_suffix))  # 员工用不同的邮箱
    return (name, hire_date, birth_date, email, phone_number, dept_id)

if __name__ == '__main__':
    # 生成133个不重名的用户
    fake = Faker(locale='zh_CN')
    emp_names = set()
    while len(emp_names) < 133:
        emp_names.add(fake.name())

    # 连接数据库 DBClient(服务器, 用户名, 密码, 数据库名)
    client = DBClient('localhost', 'root', 'tedu.cn', 'tedu_db')

    # 生成员工表数据，本例共133人: [(部门编号, 部门人数), (部门编号, 部门人数)]
    emps = [(1, 8), (2, 5), (3, 6), (4, 55), (5, 12), (6, 9), (7, 35), (8, 3)]
    insert_emp = "INSERT INTO employees (name, hire_date, birth_date, email, phone_number, dept_id) VALUES (%s, %s, %s, %s, %s, %s)"
    for dep_id, n in emps:
        for i in range(n):
            data = gen_employee(emp_names.pop(), dep_id)
            client.exec_sql(insert_emp, data)

    # 生成工资表数据
    insert_sal = "INSERT INTO salary (date, employee_id, basic, bonus) VALUES (%s, %s, %s, %s)"
    # 生成所有员工的基本工资，从5000到20000不等。字典的key是员工id
    emp_salary = dict()
    for i in range(1, 134):
        emp_salary[i] = randint(5, 21) * 1000
    # 生成从2015年到2020年所有员工的工资
    date_range = ('%d-%02d-10' % (i, j) for i in range(2015, 2021) for j in range(1, 13))
    y = 0
    for date in date_range:
        y += 1
        if y == 12:  # 基本工资每年涨5%
            for emp_id in emp_salary:
                emp_salary[emp_id] = int(emp_salary[emp_id] * 1.05)
            y = 0
        for emp_id in emp_salary:
            bonus = randint(1, 11) * 1000  # 奖金从1000到10000不等
            data = (date, emp_id, emp_salary[emp_id], bonus)
            client.exec_sql(insert_sal, data)

    # 清理数据，员工未入职时，不能有工资
    del_sal = "DELETE FROM salary WHERE employee_id=%s AND date < (SELECT hire_date FROM employees WHERE employee_id=%s)"
    for i in range(1, 134):
        client.exec_sql(del_sal, (i, i))

    client.commit()
    client.close()
